home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-02
/
sp12src.zip
/
WORDS.PAS
< prev
Wrap
Pascal/Delphi Source File
|
1991-03-28
|
17KB
|
501 lines
{$A+,B-,D-,E-,F-,G-,I-,L-,N-,O-,R-,S-,V+,X+}
{$M 6144,8192,655360}
Program Words;
{ WORDS - A word extracter program. Copyright 1990,91 by Edwin T. Floyd. }
Uses Dos, Crt, Token, PairHeap;
Const
WordChar = ['a'..'z','A'..'Z']; { Default WordSet }
DefaultOutput = ''; { Default output filename (''=stdout) }
BufSize = 4096; { I/O buffer size }
Type
SetOpType = (Union, Intersection, Complement);
SetOfChar = Set Of Char;
SortEntryType = Object(HeapEntry)
{ Data structure used for sorting }
Token : Word;
End;
SortHeapType = Object(Heap)
{ PairHeap compare function override }
Function Less(Var x, y : HeapEntry) : Boolean; Virtual;
End;
FileEntryPtr = ^FileEntry;
FileEntry = Record
{ Input file name list entry }
NextFile : FileEntryPtr;
FileName : PathStr;
End;
Const
FileList : FileEntryPtr = Nil; { File name list (head) }
LastFile : FileEntryPtr = Nil; { File name list (tail) }
HashTab : PToken = Nil; { Hash table pointer }
TestTab : PToken = Nil; { Test hash table pointer }
WordCount : LongInt = 0; { Total number of words examined }
ReturnCode : Word = 0; { Return code for Halt }
WordSet : SetOfChar = WordChar; { Words are made of these }
SetOp : SetOpType = Union; { Set operation }
Alphabetize : Boolean = False; { If true, sort output words }
LowerCase : Boolean = False; { If true, case is significant }
HighOrder : Boolean = False; { If true, clear high-order bits }
SuppressOutput : Boolean = False; { If true, do not write output file }
OutOfMemory : Boolean = False; { Set true by HandleHeapError }
Aborted : Boolean = False; { True if operator aborted }
OutName : PathStr = DefaultOutput; { Output file name }
Var
OldMem : LongInt; { Original value of MemAvail }
SortHeap : SortHeapType; { Sorter object }
TextFile : File; { Input/Output file }
TextBuf : Array[1..BufSize] Of Char; { I/O buffer for TextFile }
{$S+}
Function ProcessParameter(s : String) : Boolean; Forward;
Function ParseParamString(s : String) : Boolean;
{ Extract parameters from a string and process them; return True if all OK. }
Var
i, j : Word;
ParamsOk : Boolean;
Begin
ParamsOk := True;
While (s <> '') And (s[Length(s)] = ' ') Do Dec(s[0]);
While s <> '' Do Begin
i := 1;
While (i <= Length(s)) And (s[i] = ' ') Do Inc(i);
j := Succ(i);
While (j <= Length(s)) And (s[j] <> ' ') Do Inc(j);
If Not ProcessParameter(Copy(s, i, j - i)) Then ParamsOk := False;
Delete(s, 1, Pred(j));
End;
ParseParamString := ParamsOk;
End;
Function ProcessParameter(s : String) : Boolean;
{ Process command line parameter or file name; return True if OK. }
Var
ThisFile : FileEntryPtr;
IncludeFile : Text;
ParamOk : Boolean;
i, j : Word;
IoRes : Integer;
Procedure GetFiles(Var s : String);
Var
Path : PathStr;
Dir : DirStr;
Name : NameStr;
Ext : ExtStr;
Search : SearchRec;
Begin
Path := FExpand(s);
FSplit(Path, Dir, Name, Ext);
FindFirst(Path, Archive, Search);
If DosError <> 0 Then Begin
WriteLn('No files match ', s);
ParamOk := False;
End;
While DosError = 0 Do Begin
Path := Dir + Search.Name;
ThisFile := FileList;
While (ThisFile <> Nil) And (ThisFile^.FileName <> Path) Do
ThisFile := ThisFile^.NextFile;
If ThisFile = Nil Then Begin
New(ThisFile);
If ThisFile <> Nil Then Begin
With ThisFile^ Do Begin
NextFile := Nil;
FileName := Path;
End;
If LastFile = Nil Then FileList := ThisFile
Else LastFile^.NextFile := ThisFile;
LastFile := ThisFile;
End;
End Else WriteLn('Already in list: ', Path);
FindNext(Search);
End;
End;
Begin
ParamOk := True;
If (s[1] = '-') Or (s[1] = '/') Then Case UpCase(s[2]) Of
'U' : SetOp := Union;
'I' : SetOp := Intersection;
'C' : SetOp := Complement;
'A' : If s[3] = '-' Then Alphabetize := False Else Alphabetize := True;
'L' : If s[3] = '-' Then LowerCase := False Else LowerCase := True;
'H' : If s[3] = '-' Then HighOrder := False Else HighOrder := True;
'O' : Begin
Delete(s, 1, 2);
For i := 1 To Length(s) Do s[i] := UpCase(s[i]);
If (s <> '') And ((s[1] = '-') Or (s = 'NUL')) Then Begin
SuppressOutput := True;
OutName := '-';
End Else Begin
SuppressOutput := False;
If s = '' Then OutName := s Else OutName := FExpand(s);
End;
End;
'W' : Begin
Delete(s, 1, 2);
Case s[1] Of
'+' : ;
'-' : WordSet := [];
Else Begin
WriteLn('WordSet (-W) option must be followed by + or -.');
ParamOk := False;
End;
End;
Delete(s, 1, 1);
For i := 1 To Length(s) Do
WordSet := WordSet + [s[i]];
End;
Else Begin
WriteLn('Unrecognized option: ', s);
ParamOk := False;
End;
End Else If s[1] = '@' Then Begin
Delete(s, 1, 1);
For i := 1 To Length(s) Do s[i] := UpCase(s[i]);
Assign(IncludeFile, s);
Reset(IncludeFile);
IoRes := IoResult;
If IoRes = 0 Then Begin
WriteLn('Processing include file ', s);
Repeat
ReadLn(IncludeFile, s);
IoRes := IoResult;
If IoRes = 0 Then If Not ParseParamString(s) Then ParamOk := False;
Until Eof(IncludeFile) Or (IoRes <> 0);
If IoRes <> 0 Then Begin
WriteLn('Error ', IoRes, ' reading include file');
ParamOk := False;
End;
Close(IncludeFile);
IoRes := IoResult;
End Else Begin
WriteLn('Error ', IoRes, ' opening include file ', s);
ParamOk := False;
End;
End Else GetFiles(s);
ProcessParameter := ParamOk;
End;
Procedure ParseParams;
{ Interpret environment and command line parameters; display Help info. }
Var
i, j : Word;
ParamsOk : Boolean;
Ch : Char;
s : String;
Begin
WriteLn('WORDS v1.2 - A word extractor program. Copyright (c) 1990,91 by Edwin T. Floyd.');
ParamsOk := True;
If Not ParseParamString(GetEnv('WORDS')) Then Begin
WriteLn('Error found in SET WORDS=.. environment string');
ParamsOk := False;
End;
For i := 1 To ParamCount Do Begin
FillChar(s[1], 255, ' ');
s := ParamStr(i);
If Not ProcessParameter(s) Then ParamsOk := False;
End;
If Not ParamsOk Then Begin
WriteLn('At least one parameter was in error. Run WORDS with no parameters');
WriteLn('to see documentation.');
Halt(1);
End Else If FileList = Nil Then Begin
WriteLn;
WriteLn(' WORDS filenames.. [-U/-I/-C] [-A] [-L] [-H] [-W[+/-]abc..] [-Oname] [@name]' );
WriteLn;
WriteLn('All command line parameters are separated by spaces. Input text filenames');
WriteLn('and options may be intermixed; options are distinguished by a leading hyphen:');
WriteLn;
WriteLn(' -U, -I or -C specifies the set operation to be performed on the extracted');
WriteLn(' words from the files. The operations are:');
WriteLn(' -U Union: Keep all unique words from any input file (default);');
WriteLn(' -I Intersection: Keep unique words common to all files;');
WriteLn(' -C Complement: Keep unique words from second and subsequent files only');
WriteLn(' if they are not contained in the first file.');
WriteLn(' -A[-] Sort output words alphabetically (default off).');
WriteLn(' -H[-] Clear high-order bits on input file (i.e. WordStar, default off).');
WriteLn(' -L[-] Lower case is significant (default off).');
WriteLn(' -W-abc.. Replace the word character set with the indicated characters');
WriteLn(' (default is all alphabetic characters, upper and lower case).');
WriteLn(' -W+abc.. Add additional characters to the word character set.');
WriteLn(' -O[name] Name the output file (default is name omitted => stdout).');
WriteLn(' -O- Suppress output (counts are still displayed on screen).');
WriteLn;
WriteLn('The "@" prefixes the name of an ASCII include file which may contain');
Write('filenames, options, and nested include files, in any order. ');
Ch := ReadKey;
WriteLn;
WriteLn;
WriteLn('You may use the DOS "SET" command to specify default parameters. Examples:');
WriteLn;
WriteLn(' SET WORDS=-U -A+ -L+ -Owords.out -W-ABCDEFGHIJKLMNOPQRSTUVWXYZ');
WriteLn(' SET WORDS=@defaults.wrd -O');
WriteLn;
WriteLn('Command line parameters override "SET" parameters. WORDS examples:');
WriteLn;
WriteLn(' WORDS oldwords.lst document.txt -W+-'' -C -Onewwords.lst');
WriteLn(' WORDS @filename.lst -I -Oallwords.txt');
WriteLn(' WORDS file1.txt -A+ -U -L- -O | nextprog');
WriteLn;
WriteLn('WORDS was written by:');
WriteLn;
WriteLn(' Edwin T. Floyd [76067,747] (CompuServe)');
WriteLn(' #9 Adams Park Court 404/576-3305 (work)');
WriteLn(' Columbus, GA 31909 404/322-0076 (home)');
Halt(0);
End Else Begin
Case SetOp Of
Union : s := '-U';
Intersection : s := '-I';
Complement : s := '-C';
End;
If Alphabetize Then ch := '+' Else ch := '-';
s := s + ' -A' + ch;
If LowerCase Then ch := '+' Else ch := '-';
s := s + ' -L' + ch;
If HighOrder Then ch := '+' Else ch := '-';
s := s + ' -H' + ch;
OldMem := MemAvail;
WriteLn('Options: ', s, ' -O', OutName, ', ',
OldMem Shr 10, 'k free.');
WriteLn('Press <Esc> to stop.');
End;
End;
{$S-}
Function SortHeapType.Less(Var x, y : HeapEntry) : Boolean;
{ Sort compare function override }
Var
xx : SortEntryType Absolute x;
yy : SortEntryType Absolute y;
Begin
Less := HashTab^.TokenAddress(xx.Token)^ < HashTab^.TokenAddress(yy.Token)^;
End;
Function ParseInputBlock(Len : Word) : Word;
{ Insert words from input block into hash table }
Var
Words : Word;
t : TokenString;
i, Toss : Word;
Begin
i := 1;
Words := 0;
While i <= Len Do Begin
t := '';
While (i <= Len) And Not (TextBuf[i] In WordSet) Do Inc(i);
If i <= Len Then Begin
While (i <= Len) And (Length(t) < TokenStringSize)
And (TextBuf[i] In WordSet) Do Begin
Inc(t[0]);
If LowerCase Then t[Ord(t[0])] := TextBuf[i]
Else t[Ord(t[0])] := UpCase(TextBuf[i]);
Inc(i);
End;
Inc(Words);
Case SetOp Of
Union : Toss := HashTab^.TokenInsertText(t);
Intersection : If (TestTab <> Nil) And (TestTab^.TextToken(t) <> 0) Then
Toss := HashTab^.TokenInsertText(t);
Complement : If (TestTab <> Nil) And (TestTab^.TextToken(t) = 0) Then
Toss := HashTab^.TokenInsertText(t);
End;
End;
End;
ParseInputBlock := Words;
End;
Procedure ProcessNextFile;
{ Open and process the next input file pointed to by FileList. }
Var
ThisFile : FileEntryPtr;
TempTab : PToken;
FileWords : LongInt;
i, MaxLen, Len : Word;
FileResult : Integer;
Begin
ThisFile := FileList;
With ThisFile^ Do Begin
Write(FileName, ': ');
Assign(TextFile, FileName);
Reset(TextFile, 1);
FileResult := IoResult;
If FileResult = 0 Then Begin
If HashTab = Nil Then New(HashTab, Init);
Len := 0;
FileWords := 0;
Repeat
BlockRead(TextFile, TextBuf[Succ(Len)], BufSize-Len, i);
FileResult := IoResult;
If FileResult = 0 Then Begin
MaxLen := Len + i;
If HighOrder Then For i := Succ(Len) To MaxLen Do
TextBuf[i] := Chr(Ord(TextBuf[i]) And $7F);
Len := MaxLen;
If Not Eof(TextFile) Then Begin
While (Len > 0) And (TextBuf[Len] In WordSet) Do Dec(Len);
If (Len = 0) Then Len := MaxLen;
End;
FileWords := FileWords + ParseInputBlock(Len);
MaxLen := MaxLen - Len;
If MaxLen > 0 Then
Move(TextBuf[Succ(Len)], TextBuf[1], MaxLen);
Len := MaxLen;
Write(^M, FileName, ': ', FileWords, ' words, ',
HashTab^.TokMaxToken, ' kept, ', (OldMem-MemAvail) Shr 10, 'k');
While KeyPressed Do If ReadKey = ^[ Then Aborted := True;
End;
Until Eof(TextFile) Or (FileResult <> 0) Or OutOfMemory Or Aborted;
Close(TextFile);
WriteLn(^M, FileName, ': ', FileWords, ' words, ',
HashTab^.TokMaxToken, ' kept, ', (OldMem-MemAvail) Shr 10, 'k');
WordCount := WordCount + FileWords;
End Else WriteLn('Unable to open input file ', FileName);
If FileResult <> 0 Then Begin
WriteLn('Error ', FileResult);
Inc(ReturnCode);
End;
FileList := NextFile;
If SetOp = Intersection Then Begin
TempTab := TestTab;
TestTab := HashTab;
HashTab := TempTab;
If HashTab <> Nil Then Begin
Dispose(HashTab, Done);
HashTab := Nil;
End;
End;
End;
Dispose(ThisFile);
End;
Procedure ProcessFirstFile;
{ Process the first input file. }
Var
TempTab : PToken;
Op : SetOpType;
Begin
Op := SetOp;
SetOp := Union;
ProcessNextFile;
SetOp := Op;
If SetOp In [Intersection, Complement] Then Begin
TempTab := TestTab;
TestTab := HashTab;
HashTab := TempTab;
End;
End;
Procedure SortWords;
{ Write words to output file, optionally sorted. }
Var
SortEntry : ^SortEntryType;
FileResult : Integer;
i : Word;
OutFile : Text;
Begin
If SuppressOutput Then WriteLn('Output suppressed') Else Begin
Assign(OutFile, OutName);
SetTextBuf(OutFile, TextBuf);
ReWrite(OutFile);
FileResult := IoResult;
If FileResult = 0 Then Begin
If Alphabetize Then With SortHeap Do Begin
Init;
For i := 1 To HashTab^.TokMaxToken Do Begin
New(SortEntry);
If SortEntry <> Nil Then Begin
SortEntry^.Token := i;
Insert(SortEntry^);
End;
End;
If OutOfMemory Then Begin
WriteLn('Sort suppressed due to insufficient memory');
Alphabetize := False;
Inc(ReturnCode);
End;
End;
If Alphabetize Then With SortHeap Do Begin
Write('Sorting and writing ', EntryCount, ' words to ');
If OutName = '' Then Write('<stdout>') Else Write(OutName);
WriteLn(', ', (OldMem-MemAvail) Shr 10, 'k');
For i := 1 To EntryCount Do Begin
SortEntry := DeleteLowEntry;
If FileResult = 0 Then Begin
WriteLn(OutFile, HashTab^.TokenAddress(SortEntry^.Token)^);
FileResult := IoResult;
End;
End;
End Else Begin
Write('Writing ', HashTab^.TokMaxToken, ' words to ');
If OutName = '' Then WriteLn('<stdout>') Else WriteLn(OutName);
For i := 1 To HashTab^.TokMaxToken Do If FileResult = 0 Then Begin
WriteLn(OutFile, HashTab^.TokenAddress(i)^);
FileResult := IoResult
End;
End;
If FileResult <> 0 Then Begin
WriteLn('Error ', FileResult, ' writing file ', OutName);
Inc(ReturnCode);
End;
Close(OutFile);
FileResult := IoResult;
If FileResult <> 0 Then Begin
WriteLn('Error ', FileResult, ' closing file ', OutName);
Inc(ReturnCode);
End;
End Else WriteLn('Error ', FileResult, ' opening file ', OutName);
End;
End;
{$F+}
Function HandleHeapError(Size : Word) : Integer;
Begin
If Size > 0 Then Begin
HandleHeapError := 1;
OutOfMemory := True;
End;
End;
{$F-}
Begin
FileMode := $40;
HeapError := @HandleHeapError;
OldMem := MemAvail;
ParseParams;
ProcessFirstFile;
While (FileList <> Nil) And Not (OutOfMemory Or Aborted) Do ProcessNextFile;
If OutOfMemory Then Begin
WriteLn('Input file processing terminated due to insufficient memory');
WriteLn('Words collected so far will be written to output file');
Inc(ReturnCode);
End;
If Aborted Then Begin
WriteLn('File processing aborted by operator');
SuppressOutput := True;
Inc(ReturnCode);
End;
If SetOp = Intersection Then Begin
HashTab := TestTab;
TestTab := Nil;
End Else If Alphabetize And Not SuppressOutput Then Begin
WriteLn('Maximizing free memory for sort');
If TestTab <> Nil Then Dispose(TestTab, Done);
TestTab := Nil;
End;
WriteLn('Final Counts: ', WordCount, ' words examined, ',
HashTab^.TokMaxToken, ' kept, ', (OldMem-MemAvail) Shr 10, 'k in use');
OutOfMemory := False;
SortWords;
WriteLn('Done!');
Halt(ReturnCode);
End.